1 Introduction

The objectives of this notebook are:

  • Visualize how well we could remove technical variability associated with the assay (scATAC or multiome). We should see a high degree of intermixing between the 2 assays.

  • Plot several markers of doublets: scRNAseq doublets determined by multiome, scrublet predictions, number of features, Chromatin Signature, etc. The broader objective of level 2 is to eliminate most remaining doublets and poor-quality cells, as we will discuss in future notebooks. These visualizations will allow us to explore this. We will also plot their location in the UMAP obtained at level 1.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(tidyverse)
library(reshape2)
library(ggpubr)

2.2 Parameters

# Paths
path_to_obj <- str_c(
  here::here("scATAC-seq/results/R_objects/level_3/"),
  params$cell_type,
  "/",
  params$cell_type,
  "_integrated_level_3.rds",
  sep = ""
)

path_to_obj_RNA <- str_c(
  here::here("scRNA-seq/3-clustering/3-level_3/"),
  params$cell_type,
  "_clustered_level_3_with_pre_freeze.rds",
  sep = ""
)

path_to_doublets <- here::here("scRNA-seq/3-clustering/2-level_2/tmp/doublets_multiome_df_all.rds")

# Functions
source(here::here("scRNA-seq/bin/utils.R"))

# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
                   "blue", "chocolate1", "coral2", "blueviolet",
                   "brown1", "darkmagenta", "deepskyblue1", "dimgray",
                   "deeppink1", "green", "lightgray", "hotpink1",
                   "indianred4", "khaki", "mediumorchid2", "gold", "gray")

2.3 Load data

# Seurat object
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat 
## 270784 features across 23893 samples within 1 assay 
## Active assay: peaks_macs (270784 features, 262136 variable features)
##  3 dimensional reductions calculated: umap, lsi, harmony
seurat_RNA <- readRDS(path_to_obj_RNA)
seurat_RNA
## An object of class Seurat 
## 37378 features across 81653 samples within 1 assay 
## Active assay: RNA (37378 features, 0 variable features)
##  3 dimensional reductions calculated: pca, umap, harmony
tonsil_RNA_annotation <- seurat_RNA@meta.data %>%
  rownames_to_column(var = "cell_barcode") %>%
  dplyr::filter(assay == "multiome") %>%
  dplyr::select("cell_barcode", "seurat_clusters")

colnames(tonsil_RNA_annotation) <- c("cell_barcode", "seurat_clusters_RNA")

2.3.1 Visualization of the data

p1 <- DimPlot(seurat,
  pt.size = 0.1)

p2 <- DimPlot(seurat_RNA,
  group.by = "seurat_clusters",
  pt.size = 0.1,cols = color_palette)

p1

p2

3 Assess integration

3.1 Removal of technical variability

p_assay <- plot_split_umap(seurat, var = "assay")
p_assay

4 Spot potential doublets

4.1 Multiome scRNA-seq data

4.1.1 Doublets detected in scRNA-seq data from Multiome.

Here, we can see the count of doublets detected by cell-type. Note that for the level1 annotation there are not doublets detected in the Naive and Memory B cell cluster.

multiome_doublets <- readRDS(path_to_doublets)

dfm = melt(table(multiome_doublets$cell_type))
dfm$value = as.numeric(as.character(dfm$value))
ggbarplot(dfm, x = "Var1", y = "value",
          fill = "Var1",   
          palette = "jco",
          sort.val = "desc",         
          sort.by.groups = FALSE,    
          x.text.angle = 90          
          )

doublets_cells <- colnames(seurat)[which(colnames(seurat) %in% multiome_doublets$barcode)]
length(doublets_cells)
## [1] 0

4.2 Visualize the projection of the doublets cells onto the scATAC-seq UMAP

DimPlot(
  seurat, reduction = "umap",
  cols.highlight = "darkred", cols= "grey",
  cells.highlight= doublets_cells,
  pt.size = 0.1
)

## Scrublet prediction

# Scrublet
DimPlot(seurat, group.by = "scrublet_predicted_doublet_atac")

table(seurat$scrublet_predicted_doublet_atac)
## 
## FALSE  TRUE 
## 22375  1518

4.3 QC metrics

qc_vars <- c(
  "nCount_peaks",
  "nFeature_peaks",
  "nucleosome_signal",
  "TSS.enrichment"
)
qc_gg <- purrr::map(qc_vars, function(x) {
  p <- FeaturePlot(seurat, features = x, max.cutoff = "q95")
  p
})
qc_gg
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

4.4 Annotation probability

qc_vars <- c(
  "annotation_prob")
qc_gg <- purrr::map(qc_vars, function(x) {
  p <- FeaturePlot(seurat, features = x)
  p
})
qc_gg
## [[1]]

4.5 Chromatin Signature

qc_vars <- c("NBC.MBC", "GCBC", "PC", "CD4.T", "Cytotoxic")
qc_gg <- purrr::map(qc_vars, function(x) {
  p <- FeaturePlot(seurat, feature = x, 
                   max.cutoff = 4, min.cutoff = -4) + 
    scale_color_viridis_c(option = "magma")
  p
})
qc_gg
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

4.6 Projection of the scRNA-seq clusters to scATAC-seq UMAP

Annotation level 3 for scATAC will be defined “a priori” as unannotated and the scRNA annotation will be transfered to the scATAC-multiome cells based on the same cell barcode.

cell_testing <- c("NBC_MBC", "GCBC", "PC", "CD4_T","Cytotoxic")

if (unique(seurat$annotation_level_1) %in% cell_testing){
  seurat_df <- data.frame(cell_barcode = colnames(seurat)[seurat$assay == "scATAC"])
  seurat_df$seurat_clusters_RNA <- "unannotated"
  
  df_all <- rbind(tonsil_RNA_annotation,seurat_df)
  rownames(df_all) <- df_all$cell_barcode
  df_all <- df_all[colnames(seurat), ]
  
  seurat$seurat_clusters_RNA <- df_all$seurat_clusters_RNA
  seurat@meta.data$annotation_prob  <- 1
  melt(table(seurat$seurat_clusters_RNA))
  table(is.na(seurat$seurat_clusters_RNA))
  
  seurat_multiome <-  subset(seurat, assay == "multiome")

  p3 <- DimPlot(seurat_multiome, pt.size = 0.7, group.by = "seurat_clusters_RNA",cols = color_palette)
  p3
}

p2 + p3

4.7 UMAP level 1

umap_level_1 <- FeatureScatter(
  seurat,
  "UMAP_1_level_1",
  "UMAP_2_level_1",
  group.by = "annotation_level_1"
)
umap_level_1 <- umap_level_1 +
  theme(
    #legend.position = "none",
    plot.title = element_blank()
  )
umap_level_1

5 Session Information

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Tonsil_atlas/lib/libopenblasp-r0.3.10.dylib
## 
## locale:
## [1] es_ES.UTF-8/UTF-8/es_ES.UTF-8/C/es_ES.UTF-8/es_ES.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] Signac_1.1.0.9000 ggpubr_0.4.0      reshape2_1.4.4    forcats_0.5.0     stringr_1.4.0     dplyr_1.0.2       purrr_0.3.4       readr_1.4.0       tidyr_1.1.2       tibble_3.0.4      ggplot2_3.3.2     tidyverse_1.3.0   Seurat_3.9.9.9010 BiocStyle_2.16.1 
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.1              SnowballC_0.7.0             rtracklayer_1.48.0          GGally_2.0.0                bit64_4.0.5                 knitr_1.30                  irlba_2.3.3                 DelayedArray_0.14.0         data.table_1.13.2           rpart_4.1-15                RCurl_1.98-1.2              AnnotationFilter_1.12.0     generics_0.1.0              BiocGenerics_0.34.0         GenomicFeatures_1.40.1      cowplot_1.1.0               RSQLite_2.2.1               RANN_2.6.1                  future_1.20.1               bit_4.0.4                   spatstat.data_2.1-0         xml2_1.3.2                  lubridate_1.7.9             httpuv_1.5.4                ggsci_2.9                   SummarizedExperiment_1.18.1 assertthat_0.2.1            xfun_0.18                   hms_0.5.3                   evaluate_0.14               promises_1.1.1              fansi_0.4.1                 progress_1.2.2              dbplyr_1.4.4                readxl_1.3.1                igraph_1.2.6                DBI_1.1.0                   htmlwidgets_1.5.2           reshape_0.8.8               stats4_4.0.3                ellipsis_0.3.1              backports_1.2.0            
##  [43] bookdown_0.21               biomaRt_2.44.4              deldir_0.2-3                vctrs_0.3.4                 Biobase_2.48.0              here_1.0.1                  ensembldb_2.12.1            ROCR_1.0-11                 abind_1.4-5                 withr_2.3.0                 ggforce_0.3.2               BSgenome_1.56.0             checkmate_2.0.0             sctransform_0.3.1           GenomicAlignments_1.24.0    prettyunits_1.1.1           goftest_1.2-2               cluster_2.1.0               lazyeval_0.2.2              crayon_1.3.4                labeling_0.4.2              pkgconfig_2.0.3             tweenr_1.0.1                GenomeInfoDb_1.24.0         nlme_3.1-150                ProtGenerics_1.20.0         nnet_7.3-14                 rlang_0.4.11                globals_0.13.1              lifecycle_0.2.0             miniUI_0.1.1.1              BiocFileCache_1.12.1        modelr_0.1.8                rsvd_1.0.3                  dichromat_2.0-0             cellranger_1.1.0            rprojroot_2.0.2             polyclip_1.10-0             matrixStats_0.57.0          lmtest_0.9-38               graph_1.66.0                ggseqlogo_0.1              
##  [85] Matrix_1.3-2                carData_3.0-4               zoo_1.8-8                   reprex_0.3.0                base64enc_0.1-3             ggridges_0.5.2              png_0.1-7                   viridisLite_0.3.0           bitops_1.0-6                KernSmooth_2.23-17          Biostrings_2.56.0           blob_1.2.1                  parallelly_1.21.0           jpeg_0.1-8.1                rstatix_0.6.0               S4Vectors_0.26.0            ggsignif_0.6.0              scales_1.1.1                memoise_1.1.0               magrittr_1.5                plyr_1.8.6                  ica_1.0-2                   zlibbioc_1.34.0             compiler_4.0.3              RColorBrewer_1.1-2          fitdistrplus_1.1-1          Rsamtools_2.4.0             cli_2.1.0                   XVector_0.28.0              listenv_0.8.0               patchwork_1.1.0             pbapply_1.4-3               ps_1.4.0                    htmlTable_2.1.0             Formula_1.2-4               MASS_7.3-53                 mgcv_1.8-33                 tidyselect_1.1.0            stringi_1.5.3               yaml_2.2.1                  askpass_1.1                 latticeExtra_0.6-29        
## [127] ggrepel_0.8.2               grid_4.0.3                  VariantAnnotation_1.34.0    fastmatch_1.1-0             tools_4.0.3                 future.apply_1.6.0          parallel_4.0.3              rio_0.5.16                  rstudioapi_0.12             lsa_0.73.2                  foreign_0.8-80              gridExtra_2.3               farver_2.0.3                Rtsne_0.15                  digest_0.6.27               BiocManager_1.30.10         shiny_1.5.0                 Rcpp_1.0.5                  GenomicRanges_1.40.0        car_3.0-10                  broom_0.7.2                 later_1.1.0.1               RcppAnnoy_0.0.16            OrganismDbi_1.30.0          httr_1.4.2                  AnnotationDbi_1.50.3        ggbio_1.36.0                biovizBase_1.36.0           colorspace_2.0-0            rvest_0.3.6                 XML_3.99-0.3                fs_1.5.0                    tensor_1.5                  reticulate_1.18             IRanges_2.22.1              splines_4.0.3               RBGL_1.64.0                 uwot_0.1.8.9001             RcppRoll_0.3.0              spatstat.utils_2.1-0        plotly_4.9.2.1              xtable_1.8-4               
## [169] jsonlite_1.7.1              spatstat_1.64-1             R6_2.5.0                    Hmisc_4.4-1                 pillar_1.4.6                htmltools_0.5.1.1           mime_0.9                    glue_1.4.2                  fastmap_1.0.1               BiocParallel_1.22.0         codetools_0.2-17            lattice_0.20-41             curl_4.3                    leiden_0.3.5                zip_2.1.1                   openxlsx_4.2.3              openssl_1.4.3               survival_3.2-7              rmarkdown_2.5               munsell_0.5.0               GenomeInfoDbData_1.2.3      haven_2.3.1                 gtable_0.3.0